rm(list = ls())

## 

library(tidyverse)
library(lfe)

##

civey <- read_rds('data/civey.rds') %>% 
  mutate(runvar = 10000 - pop_dec_09_noise_added)

## Complete obs

df_use <- civey[complete.cases(civey[, c("budget_surplus_surplus_invest", 
                                         'pop_dec_09_noise_added', 
                                         'treated')]), ]

## Source helper function to tidy rdrobust output

source("code/tidy_rd.R")

## RDD w/o covars

out <- rdrobust(y = df_use %>% pull(budget_surplus_surplus_invest), 
                x = df_use$runvar, 
                c = 0)

## Return this : Robust B-C SE
out_df_rdd <- out %>%
  tidy_rd(3) %>% 
  mutate(method = "RDD") %>% 
  mutate(conf.low90 = estimate - qnorm(0.95) * std.error,
         conf.high90 = estimate + qnorm(0.95) * std.error)

## OLS with full sample 
ols_df <- df_use %>% 
  filter(pop_between_5k_and_15k == 1)

## Declare outcome

f1 <- paste('budget_surplus_surplus_invest~treated | 0 | 0 | 0 ') %>% as.formula()
f2 <- paste('budget_surplus_surplus_invest~treated | state | 0 | 0 ') %>% as.formula()
f3 <- paste('budget_surplus_surplus_invest~treated | county_id | 0 | 0 ') %>% as.formula()

m1 <- felm(f1, data = ols_df)
m2 <- felm(f2, data = ols_df)
m3 <- felm(f3, data = ols_df)

mlist <- list(m1, m2, m3)
n_list <- mlist %>% sapply(function(x) length(x$residuals))

## Return

out_df_ols <- list(m1, m2, m3) %>% 
  lapply(broom::tidy, conf.int = T) %>% reduce(rbind) %>% 
  filter(str_detect(term, 'treated')) %>% 
  mutate(outcome = "budget_surplus_surplus_invest", 
         method = c('OLS (no FE)',
                    'OLS (State FE)',
                    'OLS (County FE)'),
         n = n_list,
         exclude_state = 'Full sample') %>% 
  mutate(conf.low90 = estimate - qnorm(0.95) * std.error,
         conf.high90 = estimate + qnorm(0.95) * std.error)

## Combine and return
out_full <- bind_rows(out_df_rdd, out_df_ols)
out_full <- out_full %>% 
  mutate(method = factor(method)) %>% 
  mutate(method = fct_rev(method))

# Figure A.20: Effect on demand for investment ----
# Note that the last estimate in this figure will not be 100% the same as in the paper
# Since we added noise to the pre-census population to avoid identifying respondents

p1 <- ggplot(out_full, aes(method, estimate)) +
  geom_hline(yintercept = 0, linetype = 'dotted') +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                width = 0, size = 0.6) +
  geom_errorbar(aes(ymin = conf.low90, ymax = conf.high90), 
                width = 0, size = 1.05) +
  geom_point(shape = 21, fill = 'white', size = 2) +
  xlab('Model') + ylab('Effect on demand for public investment') +
  theme_bw() +
  coord_flip() 
p1
